3. Comparing administrative boundaries#

import sys, os, importlib, json, multiprocessing
import folium, shapely, rasterio

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import Polygon, Point, mapping
from shapely.ops import unary_union
from urllib.request import urlopen

sys.path.insert(0, "../../gostrocks/src/")
import GOSTRocks.ntlMisc as ntl
import GOSTRocks.rasterMisc as rMisc

from GOSTboundaries import boundary_helper

%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
geobounds_url = 'https://www.geoboundaries.org/api/current/gbOpen/{iso3}/ADM{lvl}/'
sel_iso3 = 'VNM'
official_wb_bounds = f"/home/wb411133/projects/BOUNDARIES/Data/WB_med_res/{sel_iso3}_medium_res.geojson"
high_res_bounds = '/home/wb411133/projects/BOUNDARIES/Data/HighRes/WB_GAD_ADM_Samples/shp/WB_GAD_ADM2.shp'
output_folder = "/home/wb411133/projects/BOUNDARIES/"
# Open official World Bank boundaries
selWB = gpd.read_file(official_wb_bounds)

4. Run boundary comparison to Geobounds#

comparer = boundary_helper.country_boundary(sel_iso3, selWB)
xx = comparer.generate_boundary_difference(big_thresh=10000000)
comparer.generate_summary_difference()
comparer.map_corrected_bounds()
comparer.write_output(os.path.join(output_folder, "COUNTRY_RES", sel_iso3))

5. Run comparison between WB data at different resolutions#

inH = gpd.read_file(high_res_bounds)

selWB = selWB.to_crs(4326)
selH = inH.loc[inH['ISO_A3'] == sel_iso3]
comparer = boundary_helper.country_boundary(sel_iso3, selWB, geoBounds=selH)
xx = comparer.generate_boundary_difference(inGeo_id='OBJECTID')
m = comparer.map_corrected_bounds(geobounds_label='High def WB')
m
../src/boundary_helper.py:150: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  m = folium.Map(location=[selWB.centroid.y.values[0], selWB.centroid.x.values[0]], zoom_start=7, tiles="stamentoner", control_scale=True)
Make this Notebook Trusted to load map: File -> Trust Notebook
m.save(f'{sel_iso3}_boundary_comparison.html')

5.1. Run zonal stats on landcover data#

In order to evaluate the differences between the various admin datasets, zonal stats on a couple datasets

  1. ESA CCI 20m Africa Landcover

  2. VIIRS nighttime lights from the Lights Every Night database

esa_dataset = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif"
esa_legend = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/GLOBCOVER_LEGEND.csv"
ntl_files = ntl.aws_search_ntl()
inR = rasterio.open(esa_dataset)
inL = pd.read_csv(esa_legend, quotechar='"')
# Define the raster datasets to summarize within the admin boundaries
file_defs = [
    [ntl_files[-1], 'NTL', 'N'],
    [inR, 'LC', 'C', inL['Value'].values],
]
comparer = boundary_helper.country_boundary(sel_iso3, selWB, geoBounds=selH, verbose=True)
# Attach GEO_ID vairables from dataset 2 to dataset 1
wb_matched = comparer.match_datasets(comparer.wb_bounds, comparer.geoBounds, "OBJECTID", "OBJECTID")
comparer.wb_bounds = wb_matched #Update the WB_dataset in the comparer with the matched data
zonal_res = comparer.run_zonal(file_defs, z_geoB=True, z_wbB=True, z_corB=False)
13:26:15	Runnning zonal on NTL
13:26:17	Runnning zonal on LC
# Join the zonal res to the WB coarse boundaries
wb_mapped = comparer.wb_bounds.copy()
wb_mapped['NTL'] = zonal_res['NTL']['wbB']['NTL_SUM']

wb_high = comparer.geoBounds.copy()
wb_high['NTL_High'] = zonal_res['NTL']['geoB']['NTL_SUM'].values

# Identify the major landcover class 
wb_mapped['LC_MAX']    = zonal_res['LC']['wbB'].apply(lambda x: x.idxmax(), axis=1)
wb_high['LC_MAX_High'] = zonal_res['LC']['geoB'].apply(lambda x: x.idxmax(), axis=1).values
wb_mapped = wb_mapped.merge(wb_high.loc[:,['OBJECTID', 'NTL_High', 'LC_MAX_High']], left_on='geo_match_id', right_on='OBJECTID')
# Determine % different in nighttime lights brightness
wb_mapped['PER_NTL'] = wb_mapped.apply(lambda x: (x['NTL_High'] - x['NTL'])/x['NTL'], axis=1)
# Determine the major Landcover class in the input dataset
wb_mapped['LC_Match'] = wb_mapped.apply(lambda x: x['LC_MAX'] == x['LC_MAX_High'], axis=1)
wb_mapped['LC_MAX'] = wb_mapped['LC_MAX'].astype(str)
wb_mapped['LC_MAX_High'] = wb_mapped['LC_MAX'].astype(str)
for col in wb_mapped.items():
    if col[1].dtype == np.int64:
        #wb_mapped[col[0]] = wb_mapped[col[0]].astype(int)
        wb_mapped.drop([col[0]], axis=1, inplace=True)
        
for col in wb_mapped.items():
    if col[1].dtype == np.int64:
        print(col)
pd.DataFrame(wb_mapped).to_csv(os.path.join(output_folder, f'{sel_iso3}_wb_matched.csv'))
# Summarize LC differences
wb_mapped['LC_Match'].value_counts()
True     289
False      1
Name: LC_Match, dtype: int64
# Summarize differences in NTL
min_thresh = -1
for thresh in [-0.15, -0.05, 0.05, 0.15, 0.5, 1, 5]:
    xx = wb_mapped.loc[(wb_mapped['PER_NTL'] > min_thresh) & (wb_mapped['PER_NTL'] < thresh)]
    print(f'{xx.shape[0]} districts had NTL change between {min_thresh} and {thresh}')
    min_thresh = thresh
1 districts had NTL change between -1 and -0.15
7 districts had NTL change between -0.15 and -0.05
272 districts had NTL change between -0.05 and 0.05
6 districts had NTL change between 0.05 and 0.15
4 districts had NTL change between 0.15 and 0.5
0 districts had NTL change between 0.5 and 1
0 districts had NTL change between 1 and 5

6. Debugging#

272/wb_mapped.shape[0]
0.9379310344827586

7. Run summaries multiprocessing#

def compare_country(iso3, in_bounds):
    comparer = boundary_helper.country_boundary(iso3, in_bounds)
    xx = comparer.generate_boundary_difference(big_thresh=100, verbose=False)
    res = comparer.generate_summary_difference()
    return(comparer)
all_args = []
for cISO in ['KEN', 'VNM','CIV','NGA','UGA','ZIM','UZB']:
    all_args.append([cISO, inWB.loc[inWB['ISO3'] == cISO].copy().to_crs(4326)])
with multiprocessing.Pool(len(all_args)) as pool:
    res = pool.starmap(compare_country, all_args)